// Computes the high-order half of the 64-bit product, unsigned.
// Max line length is 57, to fit in hacker.book. (But not used there.)
// Derived from Knuth's Algorithm M.
// Subscript 0 denotes the least significant half (little endian).
#include <stdio.h>
#include <stdlib.h>     // To define "exit", req'd by XLC.

// The program below takes 16 ops, 4 of which are multiplies,
// which are of the type unsigned 16 x 16 ==> 32.
// The statement "low = (w1 << 16) + (w0 & 0xFFFF);" placed just before
// the return statement, computes the low-order part in 3 more ops.

unsigned mulhu(unsigned u, unsigned v) {
   unsigned u0, u1, v0, v1, w0, w1, w2, t;

   u0 = u & 0xFFFF;  u1 = u >> 16;
   v0 = v & 0xFFFF;  v1 = v >> 16;
   w0 = u0*v0;
   t  = u1*v0 + (w0 >> 16);
   w1 = t & 0xFFFF;
   w2 = t >> 16;
   w1 = u0*v1 + w1;
   return u1*v1 + w2 + (w1 >> 16);
}

int errors;
void error(int u, int v, int r) {
   errors = errors + 1;
   printf("Error for u = %08x, v = %08x, got %08x (%d dec)\n", u, v, r, r);
}

int main() {
   int i, r, n;
   static int test[] = {0,0,0, 0,1,0, 0,0x7fffffff,0,
      0,0xffffffff,0, 1,0x7fffffff,0, 1,0xffffffff,0,
      0xffff,0xffff,0, 0x10000,0xffff,0, 0x100000,0xffff,0xf,
      0xfffff,0xeeeee,0xee, 0x7fffffff,0x7eeeeeee,0x3f777776,
      0x7fffffff,0x7fffffff,0x3fffffff, 0xffffffff,0xffffffff,0xFFFFFFFE,
      0xffff8000,0xffff8000,0xffff0000, 0xffff0000,0x10000,0xffff,
      0xfffe7960,0x186a0,0x1869d,
      0x80000000,0x7fffffff,0x3fffffff,
      0x80000000,0x80000000,0x40000000,
      0xc0000000,0xc0000000, 0x90000000};

   n = sizeof(test)/4;

   printf("mulhu:\n");
   for (i = 0; i < n; i += 3) {
      r = mulhu(test[i], test[i+1]);
      if (r != test[i+2]) error(test[i], test[i+1], r);
      r = mulhu(test[i+1], test[i]);
      if (r != test[i+2]) error(test[i+1], test[i], r);}

   if (errors == 0)
      printf("Passed all %d cases.\n", n/3);
}
